## Kurtz and Brooks 2008 WP

library(foreign)
library(Amelia)

## Load original dataset
kb2008 <- read.dta("KB2008 WP Rep Data.dta")
head(kb2008)
dim(kb2008)

## Drop ID vars
kb2008$countryc <- kb2008$ifs <- NULL

## Drop constituent items
kb2008$factor1 <- kb2008$factor2 <- NULL

## How many variables? 37: no reduction necessary
dim(kb2008)

## Imputation
## Note: there are already lags for factoradd and factordiff
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(kb2008)
mean(NAs(kb2008)/nrow(kb2008))*100

## Thus: 5 imputations

set.seed(02138)
kb2008.out <- amelia(kb2008, m = 5, cs = "countrynumber", ts = "year", polytime = 3, lags = c("partisan_final", "union9195", "partisan_union", "mfg80", "yrschool", "gini80"), empri = 0.01*nrow(kb2008))

write.amelia(obj=kb2008.out, file.stem = "KB2008 WP Imp Data", format = "dta", separate = FALSE)
